{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "1a222eb38b796cb1",
   "metadata": {},
   "source": [
    "<a href=\"https://colab.research.google.com/github/arize-ai/phoenix/blob/main/tutorials/experiments/run_experiments_with_llama_index.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "initial_id",
   "metadata": {},
   "outputs": [],
   "source": [
    "!uv pip install -Uqq arize-phoenix \"torch<2.7\" sentence-transformers openinference-instrumentation-llama_index openinference-instrumentation-openai llama-index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f222fd97",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "import tempfile\n",
    "from datetime import datetime, timezone\n",
    "from functools import partial\n",
    "from getpass import getpass\n",
    "from time import sleep\n",
    "from typing import Any\n",
    "from urllib.request import urlretrieve\n",
    "\n",
    "import pandas as pd\n",
    "from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex\n",
    "from llama_index.core.chat_engine import ContextChatEngine\n",
    "from llama_index.core.node_parser import SentenceSplitter\n",
    "from llama_index.core.postprocessor import SentenceTransformerRerank\n",
    "from llama_index.core.response_synthesizers import get_response_synthesizer\n",
    "from llama_index.embeddings.openai import OpenAIEmbedding\n",
    "from llama_index.llms.openai import OpenAI\n",
    "from openinference.instrumentation.llama_index import LlamaIndexInstrumentor\n",
    "from openinference.instrumentation.openai import OpenAIInstrumentor\n",
    "\n",
    "import phoenix as px\n",
    "from phoenix.client import Client\n",
    "from phoenix.evals import (\n",
    "    OpenAIModel,\n",
    ")\n",
    "from phoenix.otel import register\n",
    "\n",
    "pd.set_option(\"display.max_colwidth\", None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6518f480fc2c324f",
   "metadata": {},
   "outputs": [],
   "source": [
    "if not os.getenv(\"OPENAI_API_KEY\"):\n",
    "    os.environ[\"OPENAI_API_KEY\"] = getpass(\"🔑 Enter your OpenAI API key: \")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "87caccef77f34e23",
   "metadata": {},
   "source": [
    "# Set Up Instrumentation\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b7531386",
   "metadata": {},
   "outputs": [],
   "source": [
    "px.launch_app()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cecb97dda1f7ff2f",
   "metadata": {},
   "outputs": [],
   "source": [
    "tracer_provider = register()\n",
    "LlamaIndexInstrumentor().instrument(skip_dep_check=True, tracer_provider=tracer_provider)\n",
    "OpenAIInstrumentor().instrument(skip_dep_check=True, tracer_provider=tracer_provider)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ae5bb85777bad1bf",
   "metadata": {},
   "source": [
    "# Create Dataset\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eb9a30f5b63b1b83",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(\n",
    "    {\n",
    "        \"input_messages\": [\n",
    "            [{\"role\": \"user\", \"content\": \"Which grad schools did the author apply for and why?\"}],\n",
    "            [{\"role\": \"user\", \"content\": \"What did the author do growing up?\"}],\n",
    "        ],\n",
    "        \"output_message\": [\n",
    "            {\n",
    "                \"role\": \"assistant\",\n",
    "                \"content\": \"The author applied to three grad schools: MIT and Yale, which were renowned for AI at the time, and Harvard, which the author had visited because a friend went there and it was also home to Bill Woods, who had invented the type of parser the author used in his SHRDLU clone. The author chose these schools because he wanted to learn about AI and Lisp, and these schools were known for their expertise in these areas.\",\n",
    "            },\n",
    "            {\n",
    "                \"role\": \"assistant\",\n",
    "                \"content\": \"The author took a painting class at Harvard with Idelle Weber and later became her de facto studio assistant. Additionally, the author worked on several different projects, including writing essays, developing spam filters, and painting.\",\n",
    "            },\n",
    "        ],\n",
    "    }\n",
    ")\n",
    "for c in (\"input_messages\", \"output_message\"):\n",
    "    df[c] = df[c].apply(json.dumps).astype(\"string\")\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ca2ff2bb5c1e13f9",
   "metadata": {},
   "source": [
    "## Upload Dataset\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e955fb85754f5c87",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_name = datetime.now(timezone.utc).isoformat()\n",
    "phoenix_client = Client()\n",
    "\n",
    "phoenix_client.datasets.create_dataset(\n",
    "    name=dataset_name,\n",
    "    dataframe=df,\n",
    "    input_keys=(\"input_messages\",),\n",
    "    output_keys=(\"output_message\",),\n",
    ")\n",
    "sleep(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e0134d2057cddfbd",
   "metadata": {},
   "source": [
    "## Download Dataset\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "30a54e1924e8e192",
   "metadata": {},
   "outputs": [],
   "source": [
    "ds = phoenix_client.datasets.get_dataset(dataset=dataset_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d2b64da7a25505e5",
   "metadata": {},
   "source": [
    "# Set Up LLamaIndex\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fc9f72b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Configure models\n",
    "experiment_metadata = {\n",
    "    \"llm\": \"gpt-4\",\n",
    "    \"embed_model\": \"text-embedding-3-small\",\n",
    "    \"reranker\": \"cross-encoder/ms-marco-MiniLM-L-2-v2\",\n",
    "}\n",
    "Settings.llm = OpenAI(model=experiment_metadata[\"llm\"])\n",
    "Settings.embed_model = OpenAIEmbedding(model=experiment_metadata[\"embed_model\"])\n",
    "reranker = SentenceTransformerRerank(model=experiment_metadata[\"reranker\"], top_n=2)\n",
    "\n",
    "# Load and chunk document\n",
    "print(\"📚 Loading and chunking document...\")\n",
    "essay = \"https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt\"\n",
    "\n",
    "with tempfile.NamedTemporaryFile() as tf:\n",
    "    urlretrieve(essay, tf.name)\n",
    "    # Load document\n",
    "    documents = SimpleDirectoryReader(input_files=[tf.name]).load_data()\n",
    "    print(f\"📄 Loaded {len(documents)} document(s)\")\n",
    "\n",
    "    # Create text chunks using sentence splitter\n",
    "    parser = SentenceSplitter(chunk_size=512, chunk_overlap=50)\n",
    "    nodes = parser.get_nodes_from_documents(documents)\n",
    "    print(f\"📑 Created {len(nodes)} chunks\")\n",
    "\n",
    "# Create index\n",
    "print(\"\\n🔍 Creating index...\")\n",
    "index = VectorStoreIndex(nodes)\n",
    "print(\"✅ Index created\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c48adfa7",
   "metadata": {},
   "source": [
    "# Create Task\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b28e3bb2",
   "metadata": {},
   "outputs": [],
   "source": [
    "def rag_with_reranker(input) -> str:\n",
    "    try:\n",
    "        # Parse query\n",
    "        messages = input[\"input_messages\"]\n",
    "        messages = json.loads(messages)\n",
    "        query = messages[-1][\"content\"]\n",
    "\n",
    "        # Create retriever with reranking\n",
    "        retriever = index.as_retriever(similarity_top_k=5, node_postprocessors=[reranker])\n",
    "\n",
    "        # Create response synthesizer\n",
    "        response_synthesizer = get_response_synthesizer(response_mode=\"compact\")\n",
    "\n",
    "        # Create context chat engine explicitly\n",
    "        chat_engine = ContextChatEngine.from_defaults(\n",
    "            retriever=retriever,\n",
    "            response_synthesizer=response_synthesizer,\n",
    "            system_prompt=(\n",
    "                \"You are a helpful assistant. Base your response ONLY on the provided context. \"\n",
    "                \"If you cannot find the answer in the context, say 'I cannot find that information \"\n",
    "                \"in the provided context.' Include specific details from the context in your response.\"\n",
    "            ),\n",
    "        )\n",
    "\n",
    "        # Get response\n",
    "        response = chat_engine.chat(query)\n",
    "        return str(response)\n",
    "\n",
    "    except Exception as e:\n",
    "        print(f\"❌ Error: {str(e)}\")\n",
    "        import traceback\n",
    "\n",
    "        traceback.print_exc()\n",
    "        return f\"Error: {str(e)}\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "abbfc5c7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Test the RAG pipeline directly\n",
    "test_input = {\n",
    "    \"input_messages\": json.dumps(\n",
    "        [{\"role\": \"user\", \"content\": \"Which grad schools did the author apply for?\"}]\n",
    "    )\n",
    "}\n",
    "print(\"🧪 Testing RAG pipeline...\")\n",
    "result = rag_with_reranker(test_input)\n",
    "print(f\"\\n🎯 Final result: {result}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c632ee87",
   "metadata": {},
   "source": [
    "# Define Evaluators\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6bcf19fe4ed8d089",
   "metadata": {},
   "outputs": [],
   "source": [
    "def contains_substring(output, substring: str) -> dict[str, Any]:\n",
    "    score = int(isinstance(output, str) and substring in output)\n",
    "    return {\n",
    "        \"score\": score,\n",
    "        \"explanation\": f\"the substring `{substring}` was in the output\",\n",
    "    }"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cd6aadf43899bf6",
   "metadata": {},
   "source": [
    "# Run Experiment\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eb12a5d4fe07387d",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = OpenAIModel(model=\"gpt-4o\")\n",
    "\n",
    "experiment = phoenix_client.experiments.run_experiment(\n",
    "    dataset=ds,\n",
    "    task=rag_with_reranker,\n",
    "    experiment_metadata=experiment_metadata,\n",
    "    evaluators=[partial(contains_substring, substring=\"school\")],\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
